import numpy as np

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# This script parses the log files which contain
# the raw data and translate to a more convenient
# format.
# Make sure that the log file
#   (a) starts with a blanc line
#   (b) has an end-of-file indicator "E" in a separate line
#   (c) Appends the pulse shape parameters after the "E" 
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #


# # # # # # # # # # # #
# # # P A R A M S # # #
# # # # # # # # # # # #


# Set source filename
path = "data/"
fname = "TR75.log"

#User-defined input.
fname = raw_input("Enter filename: ")

# Set output filename
outname = fname[:-3] + "npz"

# Some file format options (these will be searched for by the parser)
param_delimiter = ", "
pulse_res_delimiter = "\t"
line_delimiter = "\n"               # linux: "\n"; Windows: "\r\n"
eof_delimiter = "E\n"               # linux: "E\n"; Windows: "E\r\n"

# try to determine EOL automatically?
AUTO_EOL = True


# # # # # # # # # # # # # # # # # # #
# # # E N D   O F   P A R A M S # # #
# # # # # # # # # # # # # # # # # # #

# Internal variables
_param_name = None
_param_dtype = None


def load_file(fn):
    """Simply reads the file to a buffer called 'data'. """
    try:
        print " > Open file '%s'." % fn
        f = open(fn)
        data = f.readlines()
        data = [l.strip('\r') for l in data]
        f.close()
        print " > Successfully read %d lines." % (len(data))
        return data
    except e:
        print " > Error opening file %s" % fn
        raise e

def auto_end_of_line(data):
    global line_delimiter
    global eof_delimiter
    if data[0] == "\n": # linux
        line_delimiter = "\n"
        eof_delimiter = "E\n"
    elif data[0] == "\r\n": # win
        line_delimiter = "\r\n"
        eof_delimiter = "E\r\n"
    else:
        raise Exception("Auto detection failed.")


def make_trial_index(data):
    """Determine the number of trials (counting 'S=')
       and save their location in the data."""
    trial_dict = dict()
    search_str = "S="
    for i,l in enumerate(data):
        if l.find(search_str) == 0:
            s = int(l[2:l.find(param_delimiter)])
            #print "Add: s=%d, i=%d" % (s,i)
            trial_dict[s] = i
        if l.find(eof_delimiter) == 0:
            break
    return trial_dict


def find_trial_start(idx, data):
    """Return the starting point of trial 'idx' in the data."""
    try:
        return trial_dict[idx]
    except:
        raise EOFError("Reached end of file!")


def determine_data_structure(data):
    """Determine the number of read and write pulses per trial.
       Furthermore, find the bias of the entries relative to trial start."""
    i0 = find_trial_start(1, data)
    offset_write = 1
    num_write = 0
    offset_read = None
    num_read = 0
    i = i0 + offset_write
    while data[i] != line_delimiter:
        num_write +=1
        i += 1
        print i
    while data[i] != eof_delimiter:
        try:
            line = data[i]
            v = line[0:line.find(line_delimiter)]
            v = float(v)
            break
        except:
            if data[i][0] == "S":
                num_read = None
                break
            i += 1
    offset_read = i - i0
    if num_read is not None:
        while data[i] != line_delimiter:
            num_read +=1
            i += 1
    else:
        num_read = 0
    print " > #write pulses = %d,  #read pulses = %d." % (num_write, num_read)
    return offset_write, num_write, offset_read, num_read


def extract_params(line):
    """Read the parameters in the headline of the trial (Starting with 'S=')."""
    raw = line[0:line.find(line_delimiter)].split(param_delimiter)
    # extract everything
    name, val, dtype = [], [], []
    for nv in raw:
        n,v = nv.split('=')
        name.append(n)
        try:
            v = int(v)
            dt = 'int'
        except:
            v = float(v)
            dt = 'float'
        val.append(v)
        dtype.append(dt)
    # check consistent format
    global _param_name, _param_dtype
    if _param_name is None:
        _param_name = name
        _param_dtype = dtype
        print " > Parameters:",
        for n,dt in zip(name,dtype):
            print "%s (%s)," % (n, dt),
        print ""
    else:
        for n1,n2 in zip(_param_name, name):
            assert n1 == n2
        for dt1,dt2 in zip(_param_dtype, dtype):
            assert dt1 == dt2
    return val



def read_trial(idx, data):
    """Read and return the data of an individual trial."""
    from sys import stdout
    print "\b.",; stdout.flush()
    i = find_trial_start(idx, data)
    # get params
    param_val = extract_params(data[i])
    # get write pulses and resistances
    write_pulse, write_resist = np.zeros(num_write, dtype=int), np.zeros(num_write, dtype=float)
    for n,line in enumerate(data[i+offset_write:i+offset_write+num_write]):
        p,r = line[0:line.find(line_delimiter)].split(pulse_res_delimiter)
        write_pulse[n] = int(p)
        write_resist[n] = float(r)
    # get read resistances
    read_resist = np.zeros(num_read, dtype=float)
    for n,line in enumerate(data[i+offset_read:i+offset_read+num_read]):
        r = line[0:line.find(line_delimiter)]
        read_resist[n] = float(r)
    return param_val, write_pulse, write_resist, read_resist


# # # # # # # # # # # # # # # # # # # #
# # # # # M A I N   P R O G R A M # # #
# # # # # # # # # # # # # # # # # # # #

if __name__ == '__main__':
    data = load_file(path+fname)            # Load the data from file
    if AUTO_EOL:
        auto_end_of_line(data)
    print " > Generate index."
    trial_dict = make_trial_index(data)     # find trials in the data
    # determine the data structure within a trial
    offset_write, num_write, offset_read, num_read = determine_data_structure(data)
    read_trial(1, data)                # Dummy read to determine the names of existing parameters
    # Init all the data arrays
    param = dict.fromkeys(_param_name)
    for k in param:
        param[k] = []
    write_pulse = []
    write_resist = []
    read_resist = []
    # Read data
    # Idea: call 'read_trial' and store the data, until an 'EOFError' is raised.
    idx = 1
    print " > Reading data:"
    while True:
        try:
            pval,wp,wr,rr = read_trial(idx, data)
            for pname,v in zip(_param_name,pval):
                param[pname].append(v)
            write_pulse.append(wp)
            write_resist.append(wr)
            read_resist.append(rr)
            idx += 1
        except EOFError:
            print "\n > Done. %d trials read." % (idx-1)
            break
        except e:
            raise e

    # transform to more convenient np.ndarrays
    for k in param:
        param[k] = np.array(param[k])
    write_pulse = np.array(write_pulse)
    write_resist = np.array(write_resist)
    read_resist = np.array(read_resist)

    # save to .npz by putting everything into a large dictionary
    kwargs = dict(
        param=param,
        write_pulse=write_pulse,
        write_resist=write_resist,
        read_resist=read_resist,
        num_write=num_write,
        num_read=num_read,
        param_name=_param_name,
        param_dtype=_param_dtype,
        )

    outfile = path+outname
    print " > Save data to '%s'." % outfile 
    np.savez(outfile, **kwargs)
